# Sourcing External R Script
source("https://raw.githubusercontent.com/Cassava2050/PPD/main/utilities_tidy.R")
# Setting Up File Paths and Parameters
folder <- here::here("data//")
file <- "phenotype.csv"
skip_col <- 3
trial_interest <- "MDEPR"
year_interest <- 2022
# Loading Data
# The function 'read_cassavabase' should be defined in the sourced script
sel_data <- read_cassavabase(phenotypeFile = paste0(folder, file))
##
## Trials interested are:
## 2023111DMF1C_dona 2023112DMF1C_tani 2023113DMEPR_dona 2023114DMEPR_tani
The function ‘change_colname’ should also be defined in the sourced script
sel_data_kp <- change_colname(sel_data, NA)
## [1] "Good, the column names are standardized names now!"
obs_col <- c(
names(sel_data_kp)[str_detect(names(sel_data_kp), "obs_")],
"use_rep_number", "blockNumber",
"use_plot_number", "use_plot_width",
"use_plot_length"
)
sel_data_kp <- sel_data_kp %>%
mutate(across(all_of(obs_col), as.numeric))
# remove - , replace by _
names(sel_data_kp) = gsub("-", "_", names(sel_data_kp))
# Duplications in row and cols
duplicated_plot <- row_col_dup(sel_data_kp)
## [1] "Good, there is no duplicated combination of row and column."
cloneName_new_old <- check_clone_name(
clone_list = sel_data_kp$use_accession_name,
new_names = NA,
add_check = NULL
)
## Released varieties: [1] "Algodona" "Bellotti" "Brasilera" "Caiseli"
## [5] "Cantaclar04" "Cantaclar05" "Caribena" "Caselli"
## [9] "Catumare" "CC4-Guyana" "CC5-Guyana" "CG1141-1"
## [13] "CG489-31" "CM3306-19" "CM3306-4" "CM3555-6"
## [17] "CM4843-1" "CM4919-1" "CM523-7" "CM6119-5"
## [21] "CM6438-14" "CM6740-7" "CM6754-8" "CM9021-2"
## [25] "CMB8523" "COL1438" "COL1522" "COL2215"
## [29] "COL2737" "Colombiana" "Costena" "Cubana"
## [33] "Enana" "Enanita" "Gines" "GM273-57"
## [37] "GM4034-1" "HMC-1" "HMC1" "HMC1P12"
## [41] "INTA_amarilla" "KM94" "KU50" "Llenerita"
## [45] "Melua31" "NAT31" "Negrita" "Orense"
## [49] "Reina" "Reina_wx" "Rojita" "Romelia"
## [53] "Ropain" "SGB765-2" "SGB765-4" "Sinuana"
## [57] "SM1127-8" "SM1411-5" "SM2775-4" "SM2792-31"
## [61] "SMB2446-2" "Sucrena" "TAI" "TAI8"
## [65] "Venezolana" "Vergara" "Veronica"
## The other known clones: [1] "13SA05" "Azulita" "BB1" "BB2" "BB3"
## [6] "BK" "C19" "C243" "C33" "C39"
## [11] "C4" "C413" "Chirosa" "Chocoana" "CMB8527"
## [16] "CR52A2" "CR52A4" "CR60B10" "FalsaReina" "FALSAREINA"
## [21] "HB60" "HLS15" "IBA920057" "IBA972205" "IBA980505"
## [26] "KM140" "KM419" "KM505" "KM94" "KU50"
## [31] "MBUNDUMALI" "N30" "P13" "Rayong11" "SAUTI"
## [36] "TME287" "TME3" "TMEB419" "UNKNOWN1" "UNKOWN2"
## [1] "Good, the released names were correctly used"
## [1] "Now the standard names are used. Here are the released varieties"
## [1] "KU50_is_KM94"
## [1] "The check or released clones are:"
## newName
## 1 KU50_is_KM94
trial_standard <- sel_data_kp %>%
left_join(cloneName_new_old,
by = c("use_accession_name" = "accession_name_ori")
) %>%
select(-use_accession_name) %>%
rename(use_accession_name = use_accession_name.y)
trial_standard <- add_GIS(trial_standard)
## [1] "The locations are:"
## [1] "Dong Nai" "Tay Ninh"
## [1] "All locations are in the database."
# Get world map data for reference
vietnam_map <- map_data("world", region = "Vietnam")
# Create a data frame with your locations
locations <- data.frame(
Location = c("Tay Ninh", "Dong Nai"),
Latitude = c(11.3009, 11.1432),
Longitude = c(106.1107, 107.2742)
)
# Create a ggplot object for the map
static_map <- ggplot() +
geom_polygon(data = vietnam_map, aes(x = long, y = lat, group = group),
fill = "lightgray", col = "black", linewidth = 0.03) +
geom_point(data = locations, aes(x = Longitude, y = Latitude), size = 2, color = "red") +
geom_text_repel(data = locations, aes(x = Longitude, y = Latitude, label = Location),
size = 3) +
geom_text(aes(x = 108.5, y = 10, label = "Author: Luis Fdo. Delgado"), size = 1.5) +
labs(x = "Latitude", y = "Longitude") +
labs(title = "Locations in Vietnam", subtitle = "Tay Ninh and Dong Nai") +
theme(plot.title = element_text(face = "bold.italic"),
plot.subtitle = element_text(face = "italic")) +
coord_fixed(1.3) # This sets the aspect ratio
# Display the map
print(static_map)
# ggsave(paste("images\\map", trial_interest, ".png", sep = "_"),
# plot = static_map, units = "in", dpi = 300, width = 6, height = 5
# )
conducted_trials <-
trial_standard %>% group_by(use_trial_name, use_plant_date,use_harvest_date, use_location) %>%
summarise(n_gen = n_distinct(use_accession_name)) %>%
mutate(harvesting_time =
interval(ymd(use_plant_date), ymd(use_harvest_date)) %>% as.period,
harvesting_time = paste0(harvesting_time@month, "month ", harvesting_time@day, "day")) %>%
ungroup()
print(conducted_trials %>% rmarkdown::paged_table())
## use_trial_name use_plant_date use_harvest_date use_location n_gen
## 1 2023111DMF1C_dona 2023-March-02 2023-December-08 Dong Nai 324
## 2 2023112DMF1C_tani 2023-March-09 2023-December-08 Tay Ninh 218
## 3 2023113DMEPR_dona 2023-March-02 2023-December-07 Dong Nai 36
## 4 2023114DMEPR_tani 2023-March-09 2023-December-08 Tay Ninh 36
## harvesting_time
## 1 9month 6day
## 2 8month 29day
## 3 9month 5day
## 4 8month 29day
plants_plot <- trial_standard %>%
group_by(use_trial_name) %>%
count(obs_planted_number_plot)
print(plants_plot)
## # A tibble: 4 × 3
## # Groups: use_trial_name [4]
## use_trial_name obs_planted_number_plot n
## <chr> <dbl> <int>
## 1 2023111DMF1C_dona 4 648
## 2 2023112DMF1C_tani 4 432
## 3 2023113DMEPR_dona 10 108
## 4 2023114DMEPR_tani 10 108
plants_harvested <- trial_standard %>%
group_by(use_trial_name) %>%
count(obs_harvest_number) %>% arrange(desc(obs_harvest_number))
plants_plot %>% select(-n) %>%
left_join(plants_harvested %>%
summarise(harvested_plants = max(obs_harvest_number, na.rm = TRUE)), by = "use_trial_name")
# planted and harvested
plants_plot %>% select(-n) %>%
left_join(plants_harvested %>%
summarise(harvested_plants = max(obs_harvest_number, na.rm = TRUE)), by = "use_trial_name") %>%
write.table("clipboard", sep="\t", col.names = T, row.names = F)
plants_to_harvest <- plants_harvested %>%
ggplot(aes(x = factor(obs_harvest_number),
y = n, fill = factor(obs_harvest_number))) +
geom_col(col = 'black') +
#scale_fill_jco() +
#theme_xiaofei() +
theme(axis.text.x = element_text(vjust = 1, angle = 65),
text = element_text(size = 20),
legend.position="top")+
labs(x = "Harvest_plant_number", y = "Freq", fill = "Harvest_plant_number") +
facet_wrap(~ use_trial_name)
print(plants_to_harvest)
# ggsave(paste("images\\bar", trial_interest, ".png", sep = "_"),
# plot = plants_to_harvest, units = "in", dpi = 300, width = 9, height = 6)
trial_standard <- trial_standard %>%
mutate(obs_harvest_number_plan =
case_when(str_detect(use_trial_name, "2023111") ~ 4,
str_detect(use_trial_name, "2023112") ~ 4,
str_detect(use_trial_name, "2023113") ~ 10,
str_detect(use_trial_name, "2023114") ~ 10),
obs_germination_perc = obs_germinated_number_plot/obs_planted_number_plot * 100,
# 2) calculate area per plant
area_plant = (use_plot_length*use_plot_width)/obs_planted_number_plot,
# 3) calculate the yield_v4 obs_planted_number_plot
obs_yield_ha = ifelse(obs_yield_ha == 0, NA, obs_yield_ha),
obs_starch_content = ifelse(obs_starch_content == 0, NA, obs_starch_content),
obs_yield_ha_v2 = (((obs_root_weight_plot*10000)/(area_plant*obs_harvest_number_plan))/1000),
obs_starch_yield_ha = obs_starch_content * obs_yield_ha_v2 / 100)
library(plotly)
p1 <- trial_standard %>% ggplot() +
geom_point(aes(x = obs_yield_ha, y = obs_yield_ha_v2, color = use_plot_number), show.legend = F) +
facet_wrap(~use_trial_name) +
theme_xiaofei()
ggplotly(p1)
detach("package:plotly", unload = TRUE)
## Is numeric all traits?
is_numeric(trial_data = trial_standard)
## [1] "Good, all traits are numeric!"
# Get the tidy data
meta_info = names(trial_standard )[str_detect(names(trial_standard), "use_")]
meta_info = gsub("use_", "", meta_info)
meta_info
## [1] "year" "trial_name" "trial_type"
## [4] "plot_width" "plot_length" "plant_date"
## [7] "harvest_date" "location" "accession_name_syn"
## [10] "plot_name" "rep_number" "plot_number"
## [13] "row_number" "col_number" "check_test"
## [16] "notes" "accession_name" "check_released"
## [19] "latitude" "longitude" "altitude"
## [22] "department" "country" "ag_zone"
## [25] "location_short"
trial_tidy = trial_standard
names(trial_tidy)= gsub("use_", "", names(trial_standard))
# observations
trait_list = names(trial_tidy)[str_detect(names(trial_tidy), "obs_")]
trait_list = gsub("obs_", "", trait_list)
trait_list
## [1] "branch_number" "CBB_3mon" "CMD_1mon"
## [4] "CMD_3mon" "CMD_6mon" "CMD_9mon"
## [7] "height_1st_branch" "yield_ha" "root_weight_plot"
## [10] "vigor1_5" "vigor1_5_1mon" "lodging1_3"
## [13] "root_number_commercial" "planted_number_plot" "height"
## [16] "height_wt_leaf" "harvest_number" "root_number"
## [19] "germinated_number_plot" "starch_content" "harvest_number_plan"
## [22] "germination_perc" "yield_ha_v2" "starch_yield_ha"
names(trial_tidy)= gsub("obs_", "", names(trial_tidy))
trial_tidy = trial_tidy[c(meta_info, trait_list)]
# Boxplots and save in output folder
dev.off()
## null device
## 1
boxplot_traits(my_dat = trial_tidy,
trait_wanted = trait_list,
folder = paste0(here::here("output"), "/"),
trial_interest)
# Grouping boxplot
trait_wanted <- trait_list
plot_bxp <- trial_tidy %>%
pivot_longer(
cols = all_of(trait_wanted),
names_to = "var",
values_to = "values"
) %>%
filter(!var %in% c(
"stake_plant", "planted_number_plot",
"harvest_number", "root_weight_air",
"root_weight_water", "harvest_number_plan",
"root_rot_perc", "yield_ha_v2"
)) %>%
ggplot(aes(x = trial_name, y = values)) +
facet_wrap(~var,
ncol = 5, scales = "free_y"
) +
geom_violin(fill = "gray") +
geom_boxplot(width = 0.2) +
labs(x = NULL, y = NULL, title = "") +
theme_xiaofei() +
theme(
axis.text.x = element_text(size = 8, vjust = 1, angle = 65),
axis.text.y = element_text(size = 8),
plot.title = element_text(color = "black"),
strip.text.x = element_text(
size = 8, face = "bold.italic")
)
print(plot_bxp)
ggsave(paste0("images\\boxplot_", trial_interest, ".png"),
plot = plot_bxp, units = "in", dpi = 300, width = 16, height = 12
)
write.csv(trial_tidy, here::here("output", paste("01_", year_interest, trial_interest,
"_tidy_data4analysis_", ".csv", sep = "")), row.names = FALSE)